This data set has been designed by Josh Mannix as a part of JOUR 301.
The data shows the correlation between players in the NFL having record setting numbers as far as yards and the amount of opportunities they receive.
Some of the more interesting take aways…
While rushing numbers and rushing attempts had an obvious relationship for running backs it did not translate to receivers or quarterbacks. Many of the receivers put up big numbers with far fewer attempts than 13. On the other end there were many QB’s with far more attempts than while very few ever eclipsed the 500 yard mark.
The other trend that I noticed was the rise in passing number over the past 2 decades. In the early 2000’s it was relatively rare to see a passer over 57 attempts, but recently it has been far more common.
Data Retrieved From https://github.com/rfordatascience/tidytuesday/blob/master/data/2018/2018-08-28/nfl_2010-2017.csv
library(tidyverse)
library(tidytuesdayR)
NFL_Data <-tidytuesdayR::tt_load("2018-08-28")
nfl_stats <- NFL_Data$`nfl_2010-2017`glimpse(nfl_stats)Rows: 81,525
Columns: 23
$ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_rsh <- filter(nfl_stats, rush_yds > 200) %>%
select(name, rush_yds, starts_with("game"))
hi_rsh %>%
count(game_year)# A tibble: 17 × 2
game_year n
<dbl> <int>
1 2000 10
2 2001 2
3 2002 4
4 2003 3
5 2004 2
6 2005 3
7 2006 4
8 2007 3
9 2008 2
10 2009 5
# … with 7 more rows
glimpse(nfl_stats)Rows: 81,525
Columns: 23
$ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_pass <-filter(nfl_stats, pass_yds > 500) %>%
select(name, pass_yds, starts_with("game"))
hi_pass %>%
count(game_year)# A tibble: 10 × 2
game_year n
<dbl> <int>
1 2000 1
2 2006 1
3 2009 1
4 2011 1
5 2012 2
6 2013 1
7 2014 1
8 2015 2
9 2016 2
10 2017 1
glimpse(nfl_stats)Rows: 81,525
Columns: 23
$ ...1 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
$ name <chr> "Duce Staley", "Lamar Smith", "Tiki Barber", "Stephen Dav…
$ team <chr> "PHI", "MIA", "NYG", "WAS", "IND", "BAL", "NYJ", "MIN", "…
$ game_year <dbl> 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 200…
$ game_week <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ rush_att <dbl> 26, 27, 13, 23, 28, 27, 30, 14, 15, 10, 20, 13, 23, 14, 2…
$ rush_yds <dbl> 201, 145, 144, 133, 124, 119, 110, 109, 88, 87, 84, 80, 7…
$ rush_avg <dbl> 7.7, 5.4, 11.1, 5.8, 4.4, 4.4, 3.7, 7.8, 5.9, 8.7, 4.2, 6…
$ rush_tds <dbl> 1, 1, 2, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 1, 3, …
$ rush_fumbles <dbl> 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 2, 0, 1, 1, …
$ rec <dbl> 4, 1, 3, 4, 6, 4, 6, 2, 2, NA, 4, 3, 1, 4, 1, 1, 1, NA, N…
$ rec_yds <dbl> 61, 12, 25, 37, 40, 32, 34, 3, 20, NA, 29, 10, -2, 100, 1…
$ rec_avg <dbl> 15.3, 12.0, 8.3, 9.3, 6.7, 8.0, 5.7, 1.5, 10.0, NA, 7.3, …
$ rec_tds <dbl> 0, 0, 0, 0, 1, 0, 1, 0, 0, NA, 0, 0, 0, 1, 0, 0, 0, NA, N…
$ rec_fumbles <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, NA, 0, 0, 0, 0, 0, 0, 0, NA, N…
$ pass_att <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 41, NA, NA, NA, NA, N…
$ pass_yds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 290, NA, NA, NA, NA, …
$ pass_tds <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ int <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ sck <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 2, NA, NA, NA, NA, NA…
$ pass_fumbles <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 0, NA, NA, NA, NA, NA…
$ rate <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 102.7, NA, NA, NA, NA…
$ position <chr> "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "RB", "QB…
hi_rec <-filter(nfl_stats, rec_yds > 200) %>%
select(name, pass_yds, starts_with("game"))
hi_pass %>%
count(game_year)# A tibble: 10 × 2
game_year n
<dbl> <int>
1 2000 1
2 2006 1
3 2009 1
4 2011 1
5 2012 2
6 2013 1
7 2014 1
8 2015 2
9 2016 2
10 2017 1
Information about the dataset…
labs_grp_bubble <- labs(
title = "Rushing Attempts Leaders",
x = "Year", y = "Name",
size = "Rushing Yards")
ggp2_grp_bubble <- filter(nfl_stats,
rush_att > 33) |>
ggplot(aes(
x = game_year,
y = name)) +
geom_point(
aes(size = rush_yds,
fill= name),
show.legend = FALSE,
alpha = 2/3,
shape = 21,
color = "black") +
scale_size(range = c(1,7),
name = "rushing yards")+
ggthemes::theme_few(
base_size = 11)
#color is a fill=rush_yds inside the AES funtcion
ggp2_grp_bubble +
labs_grp_bubbleThe size of the bubble is relative to the amount of attempts above 33.
labs_scatter <- labs(
title = "Highest Rushers",
x = "game_year", y = "name)")
ggp2_scatter <- filter(
nfl_stats, rush_yds > 200)|>
ggplot(
aes(x = game_year,
y = name)) +
geom_point()
ggp2_scatter +
labs_scatterlabs_grp_bubble <- labs(
title = "Receptions >13",
x = "Year", y = "Name",
size = "Rushing Yards")
ggp2_grp_bubble <- filter(
nfl_stats, rec > 13) |>
ggplot(aes(
x = game_year, y = name)) +
geom_point(
aes(size = rush_yds, fill= name),
show.legend = FALSE,
alpha = 2/3, shape = 21,
color = "black") +
scale_size(range = c(1,7),
name = "rushing yards")+
ggthemes::theme_few()
#color is a fill=rush_yds inside the AES funtcion
ggp2_grp_bubble +
labs_grp_bubbleThe Size of the bubble is relative to the amount of receptions above 13.
labs_scatter <- labs(
title = "Highest Recivers",
x = "game_year", y = "name)")
ggp2_scatter <- filter(
nfl_stats, rec_yds > 200)|>
ggplot(
aes(x = game_year,
y = name)) +
geom_point()
ggp2_scatter +
labs_scatterlabs_grp_bubble <- labs(
title = "Pass Attempt Leaders",
x = "Year", y = "Name",
size = "Rushing Yards")
ggp2_grp_bubble <- filter(
nfl_stats, pass_att > 57) |>
ggplot(aes(
x = game_year, y = name)) +
geom_point(
aes(size = rush_yds, fill= name),
show.legend = FALSE,
alpha = 2/3, shape = 21,
color = "black") +
scale_size(range = c(1,7),
name = "rushing yards")+
ggthemes::theme_few()
ggp2_grp_bubble +
labs_grp_bubbleThe size of the bubble is relative to the amount of pass attmepts over 57.
labs_scatter <- labs(
title = "Highest Passers",
x = "game_year", y = "name)")
ggp2_scatter <- filter(
nfl_stats, pass_yds > 500)|>
ggplot(
aes(x = game_year,
y = name)) +
geom_point()
ggp2_scatter +
labs_scatter